## -------------------------------------------------------------------------
## You have loaded plyr after dplyr - this is likely to cause problems.
## If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
## library(plyr); library(dplyr)
## -------------------------------------------------------------------------
## 
## Attaching package: 'plyr'
## 
## The following object is masked from 'package:lubridate':
## 
##     here
## 
## The following objects are masked from 'package:dplyr':
## 
##     arrange, count, desc, failwith, id, mutate, rename, summarise,
##     summarize
## 
## Loading required package: MASS
## 
## Attaching package: 'MASS'
## 
## The following object is masked from 'package:dplyr':
## 
##     select
## 
## Loading required package: Matrix
## Loading required package: lme4
## Loading required package: Rcpp
## 
## arm (Version 1.7-07, built: 2014-8-27)
## 
## Working directory is /Users/andreuboada/Dropbox/ESTADISTICA/itam-dm/alumnos/equipos/RandomBuddies
## 
## 
## Attaching package: 'arm'
## 
## The following object is masked from 'package:scales':
## 
##     rescale
## 
## Rattle: A free graphical interface for data mining with R.
## Versión 3.3.0 Copyright (c) 2006-2014 Togaware Pty Ltd.
## Escriba 'rattle()' para agitar, sacudir y  rotar sus datos.

## [1] 15
## Warning: Removed 1 rows containing missing values (stat_qq).

## Warning: Removed 1 rows containing missing values (stat_qq).

## Warning: Removed 8 rows containing missing values (stat_qq).

## Warning: Removed 1 rows containing missing values (stat_qq).

## Warning: Removed 10 rows containing missing values (stat_qq).

## Warning: Removed 2 rows containing missing values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 9 rows containing missing values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing non-finite values (stat_density).

## Warning: Removed 9 rows containing missing values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 2 rows containing missing values (stat_smooth).
## Warning: Removed 2 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing non-finite values (stat_density).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 9 rows containing missing values (stat_smooth).
## Warning: Removed 9 rows containing missing values (geom_point).

## Warning: Removed 11 rows containing missing values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## Warning: Removed 8 rows containing missing values (stat_smooth).
## Warning: Removed 8 rows containing missing values (geom_point).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 8 rows containing non-finite values (stat_density).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## Warning: Removed 11 rows containing missing values (stat_smooth).
## Warning: Removed 11 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 1 rows containing non-finite values (stat_density).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## Warning: Removed 10 rows containing missing values (stat_smooth).
## Warning: Removed 10 rows containing missing values (geom_point).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.
## Warning: Removed 10 rows containing non-finite values (stat_density).

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## stat_bin: binwidth defaulted to range/30. Use 'binwidth = x' to adjust this.

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

## Warning: Removed 8 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 1 rows containing non-finite values (stat_boxplot).

## Warning: Removed 10 rows containing non-finite values (stat_boxplot).

## Warning: Removed 10 rows containing non-finite values (stat_boxplot).

## Warning: Removed 10 rows containing non-finite values (stat_boxplot).

Introducción

Como el dinero no alcanza, tomas otro trabajo rápido para una ONG. Quieren predecir la concentración de algas en ríos de la región. Tomaron datos durante un año.

Cada observación es el efecto de agregar varias muestras de agua recolectadas en el mismo río por un periodo de 3 meses en la misma estación del año.

Variables

El set contiene 18 atributos en el siguiente orden:

campo atributo valores tipo
1 temporada spring, summer, autumn, winter categrórica
2 tamaño del río small, medium, large categórica
3 velocidad low, medium, large categórica
4-11 concentraciones químicas Reales positivos continua
12-18 distribución de diferentes tipos de algas Reales positivos continua

Los NA están codificados como XXXXXXX.

Estructura General

ds
##     temporada tamano velocidad  mxPH  mnO2      Cl    NO3       NO4
## 1      winter  small    medium 8.000  9.80  60.800  6.238   578.000
## 2      spring  small    medium 8.350  8.00  57.750  1.288   370.000
## 3      autumn  small    medium 8.100 11.40  40.020  5.330   346.667
## 4      spring  small    medium 8.070  4.80  77.364  2.302    98.182
## 5      autumn  small    medium 8.060  9.00  55.350 10.416   233.700
## 6      winter  small      high 8.250 13.10  65.750  9.248   430.000
## 7      summer  small      high 8.150 10.30  73.250  1.535   110.000
## 8      autumn  small      high 8.050 10.60  59.067  4.990   205.667
## 9      winter  small    medium 8.700  3.40  21.950  0.886   102.750
## 10     winter  small      high 7.930  9.90   8.000  1.390     5.800
## 11     spring  small      high 7.700 10.20   8.000  1.527    21.571
## 12     summer  small      high 7.450 11.70   8.690  1.588    18.429
## 13     winter  small      high 7.740  9.60   5.000  1.223    27.286
## 14     summer  small      high 7.720 11.80   6.300  1.470     8.000
## 15     winter  small      high 7.900  9.60   3.000  1.448    46.200
## 16     autumn  small      high 7.550 11.50   4.700  1.320    14.750
## 17     winter  small      high 7.780 12.00   7.000  1.420    34.333
## 18     spring  small      high 7.610  9.80   7.000  1.443    31.333
## 19     summer  small      high 7.350 10.40   7.000  1.718    49.000
## 20     spring  small    medium 7.790  3.20  64.000  2.822  8777.600
## 21     winter  small    medium 7.830 10.70  88.000  4.825  1729.000
## 22     spring  small      high 7.200  9.20   0.800  0.642    81.000
## 23     autumn  small      high 7.750 10.30  32.920  2.942    42.000
## 24     winter  small      high 7.620  8.50  11.867  1.715   208.333
## 25     spring  small      high 7.840  9.40  10.975  1.510    12.500
## 26     summer  small      high 7.770 10.70  12.536  3.976    58.500
## 27     winter  small      high 7.090  8.40  10.500  1.572    28.000
## 28     autumn  small      high 6.800 11.10   9.000  0.630    20.000
## 29     winter  small      high 8.000  9.80  16.000  0.730    20.000
## 30     spring  small      high 7.200 11.30   9.000  0.230   120.000
## 31     autumn  small      high 7.400 12.50  13.000  3.330    60.000
## 32     winter  small      high 8.100 10.30  26.000  3.780    60.000
## 33     summer  small      high 7.800 11.30  20.083  3.020    49.500
## 34     autumn  small    medium 8.400  9.90  34.500  2.818  3515.000
## 35     winter  small    medium 8.270  7.80  29.200  0.050  6400.000
## 36     summer  small    medium 8.660  8.40  30.523  3.444  1911.000
## 37     winter  small      high 8.300 10.90   1.170  0.735    13.500
## 38     spring  small      high 8.000    NA   1.450  0.810    10.000
## 39     winter  small    medium 8.300  8.90  20.625  3.414   228.750
## 40     spring  small    medium 8.100 10.50  22.286  4.071   178.570
## 41     winter  small    medium 8.000  5.50  77.000  6.096   122.850
## 42     summer  small    medium 8.150  7.10  54.190  3.829   647.570
## 43     winter  small      high 8.300  7.70  50.000  8.543    76.000
## 44     spring  small      high 8.300  8.80  54.143  7.830    51.429
## 45     winter  small      high 8.400 13.40  69.750  4.555    37.500
## 46     spring  small      high 8.300 12.50  87.000  4.870    22.500
## 47     autumn  small      high 8.000 12.10  66.300  4.535    39.000
## 48     winter  small       low    NA 12.60   9.000  0.230    10.000
## 49     spring  small    medium 7.600  9.60  15.000  3.020    40.000
## 50     autumn  small    medium 7.290 11.21  17.750  3.070    35.000
## 51     winter  small    medium 7.600 10.20  32.300  4.508   192.500
## 52     summer  small    medium 8.000  7.90  27.233  1.651    28.333
## 53     winter  small      high 7.900 11.00   6.167  1.172    18.333
## 54     spring  small      high 7.900  9.00   5.273  0.910    33.636
## 55     winter  small      high 6.600 10.80      NA  3.245    10.000
## 56     spring  small    medium 5.600 11.80      NA  2.220     5.000
## 57     autumn  small    medium 5.700 10.80      NA  2.550    10.000
## 58     spring  small      high 6.600  9.50      NA  1.320    20.000
## 59     summer  small      high 6.600 10.80      NA  2.640    10.000
## 60     autumn  small    medium 6.600 11.30      NA  4.170    10.000
## 61     spring  small    medium 6.500 10.40      NA  5.970    10.000
## 63     autumn  small      high 7.830 11.70   4.083  1.328    18.000
## 64     spring  small      high 7.570 10.80   4.575  1.203    27.500
## 65     summer  small      high 7.190 11.70   4.326  1.474   160.000
## 66     winter  small      high 7.440 10.10   2.933  0.770    15.000
## 67     spring  small      high 7.140  9.80   3.275  0.923    15.000
## 68     summer  small      high 7.000 12.10   3.136  1.208    16.200
## 69     winter  small    medium 7.500  1.50  32.400  0.921  1386.250
## 70     spring  small    medium 7.500  1.80  29.775  1.051  2082.850
## 71     summer  small    medium 7.800  7.10  32.540  1.720  2167.370
## 72     autumn medium    medium 8.500  8.10  38.125  3.850   225.000
## 73     summer medium    medium 7.925 10.20  34.037  9.080   109.000
## 74     winter medium    medium 8.100  8.10 136.000  3.773   245.000
## 75     spring medium    medium 8.200  6.80 129.375  3.316   271.250
## 76     spring medium      high 9.100  9.40  35.750  5.164    32.500
## 77     autumn medium    medium 8.100  9.80  29.500  1.287   224.286
## 78     winter medium    medium 8.000  5.90  27.400  0.735   133.636
## 79     spring medium    medium 8.000  3.30  26.760  0.658   165.000
## 80     winter medium      high 7.500  9.20  11.000  3.310   101.000
## 81     spring medium      high 7.400  9.80  11.000  3.235   255.000
## 82     autumn medium      high 7.300 11.70  10.400  4.930   130.000
## 83     winter medium      high 7.400  8.90  13.500  5.442   123.333
## 84     summer medium      high 7.400 11.17  12.146  6.188    89.600
## 85     autumn medium    medium 7.500 10.80  31.000  4.408   737.500
## 86     winter medium    medium 7.600  6.00  53.000  3.734   914.000
## 87     summer medium    medium 7.400 10.77  36.248  3.730   429.200
## 88     winter medium    medium 7.800  3.60  48.667  4.030  5738.330
## 89     summer medium    medium 7.600  9.70  53.102  7.160  4073.330
## 90     winter medium    medium 8.500  8.60 125.600  3.778   124.167
## 91     spring medium    medium 8.700  9.40 173.750  3.318   101.250
## 92     summer medium    medium 8.100 10.70  94.405  4.698   153.000
## 93     winter medium      high 8.800  8.50  53.333  5.132    96.667
## 94     spring medium      high 7.800 10.50  70.000  2.443    98.333
## 95     summer medium      high 7.900 11.80  63.510  4.940   137.000
## 96     autumn medium       low 8.500 10.50  56.717  0.330   215.714
## 97     winter medium       low 9.100  5.40  61.050  0.308   105.556
## 98     spring medium       low 8.900  4.50  57.750  0.267   155.000
## 99     winter medium      high 7.900  6.30 101.875  3.978   153.750
## 100    summer medium      high 7.800  8.20  85.982  6.200   421.667
## 101    winter medium    medium 7.700  7.10  63.625  3.140   122.500
## 102    spring medium    medium 7.800  6.50  82.111  2.603   215.556
## 103    winter medium       low 7.700  5.30  65.333  2.899   371.111
## 104    summer medium       low 7.500  8.80  58.331  8.688   758.750
## 105    autumn medium       low 7.600 10.00  49.625  5.456   308.750
## 106    winter medium       low 8.700  7.40  47.778  2.316    38.111
## 107    summer medium       low 7.700 11.10  47.229  8.759   239.000
## 108    autumn medium      high 8.300 11.10  41.500  4.665   931.833
## 109    winter medium      high 8.430  6.00  40.167  2.670   723.667
## 110    summer medium      high 8.160 11.10  32.056  5.694   461.875
## 111    winter medium      high 8.700  9.80   5.889  1.534    51.111
## 112    spring medium      high 8.200 11.30   7.250  1.875    25.000
## 113    summer medium      high 8.500 11.80   7.838  1.732   206.538
## 114    spring medium    medium 7.800  6.00  53.425  0.381   118.571
## 115    summer medium    medium 8.000  9.70  57.848  0.461   217.750
## 116    winter medium      high 9.700 10.80   0.222  0.406    10.000
## 117    summer medium      high 8.600 11.62   1.549  0.445    25.833
## 118    autumn medium    medium 8.300 11.60   5.830  0.701    12.727
## 119    spring medium       low 8.400  5.30  74.667  3.900   131.667
## 120    summer medium       low 8.200  6.60 131.400  4.188    92.000
## 121    winter medium    medium 8.200  9.40  45.273  7.195   345.455
## 122    spring medium    medium 8.100  7.10  42.636  5.078    56.364
## 123    summer medium    medium 8.100  9.00  48.429  6.640   128.571
## 124    winter medium      high 7.400 10.70  11.818  2.163   170.909
## 125    spring medium      high 8.300  9.70  10.556  1.921    65.556
## 126    summer medium      high 8.600 10.70  12.000  2.231    43.750
## 127    winter medium    medium 9.100 11.60  31.091  5.099   246.364
## 128    spring medium    medium 9.000  6.90  28.333  2.954    76.667
## 129    summer medium    medium 8.300 10.00  30.125  3.726   102.500
## 130    winter medium      high 8.500 10.10  10.936  1.335   236.000
## 131    spring medium      high 8.300  7.70  10.078  1.212   103.333
## 132    summer medium      high 7.300 10.50  11.088  1.374    92.375
## 133    winter medium    medium 7.900  9.80 194.750  6.513  3466.660
## 134    spring medium    medium 7.900  8.30 391.500  6.045   380.000
## 135    autumn medium    medium 8.000 11.90 130.670  6.540   196.000
## 136    spring medium    medium 8.000  9.20  39.000  4.860   120.000
## 137    autumn medium    medium 8.100 11.70  35.660  5.130    46.500
## 138    winter medium       low 8.430  9.90  37.600  0.826   124.000
## 139    summer medium       low 8.100  6.20  39.000  0.673   112.857
## 140    winter medium    medium 7.900 11.20  49.900  9.773   505.000
## 141    summer medium    medium 8.100  6.20  51.113  5.099   175.000
## 142    spring medium      high 7.800  9.50   8.300  1.670    34.000
## 143    autumn medium      high 7.900 10.50  10.207  2.304   132.250
## 144    winter medium       low 8.000  4.50  79.077  8.984   920.000
## 145    spring medium       low 7.600  6.30  81.333  9.715   196.667
## 146    autumn medium       low 7.800  6.50  64.093  7.740  1990.160
## 147    winter medium      high 8.220  8.10  41.250  1.415   172.500
## 148    autumn medium      high 8.300  9.90  40.226  1.587   235.000
## 149    winter medium      high 8.470  9.00  46.167  2.102    84.667
## 150    spring medium      high 8.400  4.90  47.000  0.536    91.833
## 151    autumn medium      high 8.870 11.00  41.163  2.273    54.750
## 152    summer medium      high 7.700  4.40  53.000  2.310    90.000
## 153    autumn medium      high 7.300 11.80  44.205 45.650 24064.000
## 154    spring medium    medium 7.900  6.00 127.833  2.680   176.667
## 155    autumn medium    medium 7.800 10.53 100.830  5.410   486.500
## 156    spring  large       low 7.800  3.20  94.000  4.908  1131.660
## 157    summer  large       low 7.600  4.90  69.000  3.685  1495.000
## 158    spring  large       low 8.600  3.60  50.000  0.376   134.000
## 159    autumn  large       low 8.400 10.60  19.220  1.655    96.833
## 160    winter  large       low 8.300 11.50  26.000  1.870    62.500
## 161    spring  large       low 9.000  5.80      NA  0.900   142.000
## 162    spring  large       low 9.500  5.70  44.000  0.102   146.667
## 163    summer  large       low 8.800  8.80  43.000  0.130   103.333
## 164    autumn  large       low 8.840 12.90  43.090  0.846    52.200
## 165    winter  large      high 7.300  9.90  16.000  4.820   101.667
## 166    autumn  large      high 7.400 10.68  22.350  5.414   244.600
## 167    spring  large       low 9.100  4.30  82.857  0.860   137.273
## 168    autumn  large       low 8.530 11.10  63.292  1.726   227.600
## 169    winter  large       low 8.560  8.70  43.970  4.053   643.000
## 170    autumn  large       low 8.060  8.30  38.902  3.678   627.273
## 171    winter  large    medium 8.240  6.10  95.367  3.561  1168.000
## 172    summer  large    medium 7.910  6.20 151.833  3.923  1081.660
## 173    winter  large    medium 8.210  9.30 104.818  3.908   124.364
## 174    spring  large    medium 8.500  7.30  71.444  2.512    66.667
## 175    spring  large    medium 8.600 10.60 208.364  4.459   197.909
## 176    winter  large    medium 9.060  6.35 187.183  3.351    54.778
## 177    autumn  large      high 8.700 10.70   4.545  0.941    32.727
## 178    spring  large      high 8.100 10.70   3.500  1.013    12.500
## 179    summer  large      high 8.400 10.29   5.326  0.996    53.846
## 180    spring  large    medium 8.600 10.10   2.111  0.663    11.111
## 181    summer  large    medium 8.200  9.50   2.200  0.672    10.000
## 182    winter  large    medium 8.500 10.50   2.750  0.758    10.500
## 183    summer  large    medium 8.300 10.00   3.860  0.866    32.000
## 184    winter  large      high 8.000 10.90   9.055  0.825    40.000
## 185    summer  large      high 8.100 10.20   7.613  0.699    32.500
## 186    winter  large       low 8.700 10.80  39.109  6.225   161.818
## 187    winter  large       low 8.700 11.70  22.455  3.765    88.182
## 188    summer  large       low 8.400  8.20  23.250  2.805    43.750
## 189    autumn  large       low 8.550 11.00  22.320  3.140    82.100
## 190    spring  large    medium 8.500  7.60  12.778  1.873    17.778
## 191    autumn  large    medium 8.700 11.40  15.541  2.323   103.000
## 192    winter  large    medium 8.400 10.50  12.182  1.519    65.455
## 193    spring  large    medium 8.200  8.20   7.333  1.003    37.778
## 194    autumn  large    medium 8.580 11.10  23.825  3.617    72.600
## 195    summer  large    medium 8.500  7.90  12.444  2.586    96.667
## 196    autumn  large    medium 8.400  8.40  17.375  3.833    83.750
## 197    spring  large    medium 8.300 10.60  14.320  3.200   125.333
## 198    autumn  large    medium 8.200  7.00 139.989  2.978    60.110
## 200    summer  large    medium 8.500  6.70  82.852  2.800    27.069
##        oPO4     PO4    Chla   a1   a2   a3   a4   a5   a6   a7
## 1   105.000 170.000  50.000  0.0  0.0  0.0  0.0 34.2  8.3  0.0
## 2   428.750 558.750   1.300  1.4  7.6  4.8  1.9  6.7  0.0  2.1
## 3   125.667 187.057  15.600  3.3 53.6  1.9  0.0  0.0  0.0  9.7
## 4    61.182 138.700   1.400  3.1 41.0 18.9  0.0  1.4  0.0  1.4
## 5    58.222  97.580  10.500  9.2  2.9  7.5  0.0  7.5  4.1  1.0
## 6    18.250  56.667  28.400 15.1 14.6  1.4  0.0 22.5 12.6  2.9
## 7    61.250 111.750   3.200  2.4  1.2  3.2  3.9  5.8  6.8  0.0
## 8    44.667  77.434   6.900 18.2  1.6  0.0  0.0  5.5  8.7  0.0
## 9    36.300  71.000   5.544 25.4  5.4  2.5  0.0  0.0  0.0  0.0
## 10   27.250  46.600   0.800 17.0  0.0  0.0  2.9  0.0  0.0  1.7
## 11   12.750  20.750   0.800 16.6  0.0  0.0  0.0  1.2  0.0  6.0
## 12   10.667  19.000   0.600 32.1  0.0  0.0  0.0  0.0  0.0  1.5
## 13   12.000  17.000  41.000 43.5  0.0  2.1  0.0  1.2  0.0  2.1
## 14   16.000  15.000   0.500 31.1  1.0  3.4  0.0  1.9  0.0  4.1
## 15   13.000  61.600   0.300 52.2  5.0  7.8  0.0  4.0  0.0  0.0
## 16    4.250  98.250   1.100 69.9  0.0  1.7  0.0  0.0  0.0  0.0
## 17   18.667  50.000   1.100 46.2  0.0  0.0  1.2  0.0  0.0  0.0
## 18   20.000  57.833   0.400 31.8  0.0  3.1  4.8  7.7  1.4  7.2
## 19   41.500  61.500   0.800 50.6  0.0  9.9  4.3  3.6  8.2  2.2
## 20  564.600 771.600   4.500  0.0  0.0  0.0 44.6  0.0  0.0  1.4
## 21  467.500 586.000  16.000  0.0  0.0  0.0  6.8  6.1  0.0  0.0
## 22   15.600  18.000   0.500 15.5  0.0  0.0  2.3  0.0  0.0  0.0
## 23   16.000  40.000   7.600 23.2  0.0  0.0  0.0 27.6 11.1  0.0
## 24    3.000  27.500   1.700 74.2  0.0  0.0  3.7  0.0  0.0  0.0
## 25    3.000  11.500   1.500 13.0  8.6  1.2  3.5  1.2  1.6  1.9
## 26    9.000  44.136   3.000  4.1  0.0  0.0  0.0  9.2 10.1  0.0
## 27    4.000  13.600   0.500 29.7  0.0  0.0  4.9  0.0  0.0  0.0
## 28    4.000      NA   2.700 30.3  1.9  0.0  0.0  2.1  1.4  2.1
## 29   26.000  45.000   0.800 17.1  0.0 19.6  0.0  0.0  0.0  2.5
## 30   12.000  19.000   0.500 33.9  1.0 14.6  0.0  0.0  0.0  0.0
## 31   72.000 142.000   4.900  3.4 16.0  1.2  0.0 15.3 15.8  0.0
## 32  246.000 304.000   2.800  6.9 17.1 20.2  0.0  4.0  0.0  2.9
## 33   53.000 130.750   5.800  0.0  8.0  1.9  0.0 11.2 42.7  1.2
## 34   20.000  47.000   2.300 13.6  9.1  0.0  0.0  1.4  0.0  0.0
## 35    7.400  23.000   0.900  5.3 40.7  3.3  0.0  0.0  0.0  1.9
## 36   58.875  84.460   3.600 18.3 12.4  1.0  0.0  0.0  0.0  1.0
## 37    1.625   3.000   0.200 66.0  0.0  0.0  0.0  0.0  0.0  0.0
## 38    2.500   3.000   0.300 75.8  0.0  0.0  0.0  0.0  0.0  0.0
## 39  196.620 253.250  12.320  2.0 38.5  4.1  2.2  0.0  0.0 10.2
## 40  182.420 255.280   8.957  2.2  2.7  1.0  3.7  2.7  0.0  0.0
## 41  143.710 296.000   3.700  0.0  5.9 10.6  1.7  0.0  0.0  7.1
## 42   59.429 175.046  13.200  0.0  0.0  0.0  5.7 11.3 17.0  1.6
## 43  264.900 344.600  22.500  0.0 40.9  7.5  0.0  2.4  1.5  0.0
## 44  276.850 326.857  11.840  4.1  3.1  0.0  0.0 19.7 17.0  0.0
## 45   10.000  40.667   3.900 51.8  4.1  0.0  0.0  3.1  5.5  0.0
## 46   27.000  43.500   3.300 29.5  1.0  2.7  3.2  2.9  9.6  0.0
## 47   16.000  39.000   0.800 54.4  3.4  1.2  0.0 18.7  2.0  0.0
## 48    5.000   6.000   1.100 35.5  0.0  0.0  0.0  0.0  0.0  0.0
## 49   27.000 121.000   2.800 89.8  0.0  0.0  0.0  0.0  0.0  0.0
## 50   13.000  20.812  12.100 24.8  7.4  0.0  2.5 10.6 17.1  3.2
## 51   12.750  49.333   7.900  0.0  0.0  0.0  4.6  1.2  0.0  3.9
## 52    7.300  22.900   4.500 39.1  0.0  1.2  2.2  5.4  1.5  3.2
## 53    7.750  11.800   0.500 81.9  0.0  0.0  0.0  0.0  0.0  0.0
## 54    9.000  11.818   0.800 54.0  0.0  0.0  2.4  0.0  0.0  0.0
## 55    1.000   6.500      NA 24.3  0.0  0.0  0.0  0.0  0.0  0.0
## 56    1.000   1.000      NA 82.7  0.0  0.0  0.0  0.0  0.0  0.0
## 57    1.000   4.000      NA 16.8  4.6  3.9 11.5  0.0  0.0  0.0
## 58    1.000   6.000      NA 46.8  0.0  0.0 28.8  0.0  0.0  0.0
## 59    2.000  11.000      NA 46.9  0.0  0.0 13.4  0.0  0.0  0.0
## 60    1.000   6.000      NA 47.1  0.0  0.0  0.0  0.0  1.2  0.0
## 61    2.000  14.000      NA 66.9  0.0  0.0  0.0  0.0  0.0  0.0
## 63    3.333   6.667      NA 14.4  0.0  0.0  0.0  0.0  0.0  0.0
## 64    2.000   6.750   1.000 20.3  4.3  5.5  0.0  0.0  0.0  1.4
## 65    2.500   7.200   0.300 15.8  1.7  7.8  0.0  0.0  2.4  1.4
## 66    1.333   6.000   0.600 55.5  0.0  1.7  1.4  0.0  0.0  0.0
## 67    1.250  10.750   2.500 10.3  0.0 42.8  2.2  0.0  0.0  0.0
## 68    1.800   2.500   0.500 64.2  0.0  3.0  0.0  0.0  0.0  0.0
## 69  220.750 351.600  10.000  0.0  0.0  1.5  7.6  0.0  0.0  6.1
## 70  209.857 313.600   1.000  1.9  4.9  2.6  3.0  0.0  0.0  1.9
## 71  151.125 279.066  13.100 25.5  3.9  1.0 11.0  0.0  0.0 12.5
## 72   45.000 152.333   5.200 11.3  1.7  2.0  2.2 13.3 10.6  0.0
## 73   55.000  58.623  11.600  4.4  4.0  3.3  0.0 11.7 21.4  1.2
## 74  136.750 249.250  20.870  1.9  5.8 24.8  4.6  9.5  5.1  1.2
## 75  100.000 233.500  13.000  1.6  8.0 17.6  3.7 11.5  7.0  0.0
## 76   85.500 215.500  18.370  2.2  9.6  5.0  1.0  8.6  7.9  2.2
## 77   25.167 102.333   3.600 64.9  1.0  0.0  1.0  2.9  1.4  1.0
## 78   36.000 105.727   3.000 15.1  7.3 23.2  3.4  4.1  0.0  0.0
## 79   37.375 111.375   3.000 14.4  0.0 11.8 11.3  5.5  0.0  0.0
## 80   26.600 108.000   1.300  6.7  0.0  5.4  3.4  4.9  6.9 10.8
## 81   38.750  56.667   2.000 10.8  0.0  0.0  4.6  6.5  2.2  1.4
## 82   10.800  60.000   4.300  1.2  0.0  1.7  0.0  7.5 17.7 14.4
## 83   27.667 104.000  21.000 12.6  4.3 21.9  1.0  2.4  3.3 22.1
## 84   32.000  69.930   3.100 14.7  4.1  1.0  0.0  7.7  8.5 31.2
## 85  111.250 214.000   2.900  3.3  0.0  0.0  5.0  1.9  6.2 25.6
## 86  137.600 254.600   4.300  0.0  0.0  0.0  4.6  9.0 13.1 30.1
## 87   57.600 169.001   3.200  2.8  0.0  0.0  2.6  5.2 13.2 16.7
## 88  412.333 607.167   4.300  0.0  0.0  2.6  2.4  5.0  0.0  2.4
## 89  282.167 624.733   6.800  0.0  0.0  0.0  1.0 35.6  9.9  0.0
## 90  197.833 303.333  40.000  0.0 15.2  8.8  0.0  8.6  5.1  2.7
## 91  267.750 391.750   3.500  0.0  5.5  3.3  0.0 20.8 12.4  0.0
## 92  191.750 265.250   7.300  0.0  2.1  1.6  0.0 20.8 32.9  0.0
## 93  120.500 232.833  31.000  1.2  5.6  6.3  1.7  1.2  0.0  1.0
## 94  144.667 244.000   9.000  0.0  3.1  3.5  1.6  8.2  9.9  0.0
## 95  159.500 218.000   6.500  0.0  5.2  0.0  0.0 28.8 20.4  1.0
## 96   23.000 138.500  20.829  5.7  0.0  0.0  4.4 12.4  8.3  7.8
## 97  104.222 239.000  72.478  3.6 31.9  2.4  0.0  0.0  0.0  2.2
## 98   97.333 235.667  98.817  1.2 16.2  0.0  0.0  0.0  0.0  1.0
## 99   51.750 205.875   2.000  4.0  2.1 35.1  6.8  7.3  0.0  0.0
## 100  31.333 211.667  21.900  5.9  3.4  1.0  1.2 17.8 49.4  1.0
## 101  28.625 186.500  30.000 16.5  2.1 19.5  3.5  5.3  1.2  3.2
## 102  12.889 154.125   5.200  7.0  0.0 13.5  4.3  8.7  0.0  4.3
## 103  51.111 183.667  17.200 58.7  0.0 11.5  6.6  0.0  0.0  0.0
## 104 104.500 292.625   3.000  8.7  0.0  3.0  5.3  9.4 33.2  0.0
## 105  38.625 285.714  75.000 17.0 21.6  1.6  1.4 10.2  3.6  1.1
## 106  24.667 201.778   3.000 12.3  5.4  1.9  0.0  1.4  0.0  1.9
## 107  54.000 275.143  65.700  8.8 19.6  4.7  0.0  0.0  0.0  2.7
## 108  39.000 124.200  13.100 23.7 13.7  0.0  1.7  6.4  2.6  0.0
## 109  60.833 141.833  25.000  0.0  6.4  7.3 12.7  0.0  0.0  4.2
## 110  71.000 132.546  15.000  3.6 38.8  0.0  0.0  1.2  0.0  2.4
## 111   9.667  17.333   1.000 64.3  1.5  8.0  0.0  0.0  0.0  0.0
## 112   6.500  26.000   0.300 46.6  0.0  2.5  0.0  0.0  0.0  0.0
## 113   8.692  16.662   2.100 24.0  0.0  1.0  0.0  0.0  0.0  0.0
## 114  37.857 102.571   1.200  3.7  1.4  1.1  2.1  3.2  6.4  0.0
## 115  37.000  86.997   3.000 18.1 14.5  0.0  0.0 11.5 22.3  0.0
## 116  22.444  10.111      NA 41.0  1.5  0.0  0.0  0.0  0.0  0.0
## 117  16.833  18.293   1.400 43.7  0.0  1.2  0.0  0.0  4.7  0.0
## 118   3.545  13.200   3.200 86.6  0.0  0.0  0.0  0.0  0.0  0.0
## 119 261.600 432.909  24.917  1.9 12.7 25.9  0.0  0.0  0.0  6.8
## 120 238.200 320.400   6.800  1.2  1.9 22.9  0.0  8.1  0.0  0.0
## 121 144.000 287.000   9.882  1.4 18.4  0.0  0.0 20.0 29.5  0.0
## 122 166.727 262.727  17.200  1.6  8.9  6.6  0.0  9.2  1.6  1.4
## 123 181.000 222.286   6.429  3.3 11.6  7.0  0.0 17.9  4.7  0.0
## 124  36.909 122.000   5.555 14.6  0.0  0.0  1.9 22.1 12.7  1.4
## 125  61.556 127.222   5.233  1.7  0.0 10.3  2.6  8.9  6.7  0.0
## 126  62.625  89.625   2.150  3.3  0.0  0.0  1.9 34.3  7.1  6.0
## 127  55.000 284.000  88.255  0.0 36.6  4.1  0.0  1.2 16.7  6.1
## 128 102.333 277.333 110.456  0.0 16.4 10.1  0.0  0.0  0.0  6.6
## 129  75.875 177.625  50.225  1.5 32.8  1.0  4.1  0.0 15.8  2.4
## 130  34.636  72.900  11.100  4.2  0.0  1.4  1.9 16.2  0.0  1.4
## 131  48.667  82.444   2.000  4.1  0.0 25.3  2.1  8.0  0.0 18.6
## 132  48.625  66.750   3.300  1.2  0.0  2.3  0.0 44.4  7.5  1.9
## 133  23.000 173.750  15.300  0.0  0.0  1.0  0.0  9.0 64.6  0.0
## 134 173.000 317.000   5.500  2.4  1.7  4.2  8.3  1.7  0.0  2.4
## 135  75.000  84.000   4.500  7.8  8.7  2.1  0.0 14.9 22.9  2.4
## 136 187.000 213.000   2.000 10.3 26.5  6.1  0.0  5.6  1.5  2.2
## 137  49.000  88.500   2.500  1.5 72.6  0.0  0.0  3.4  6.8  3.4
## 138  32.500 115.000  11.700  9.2  2.9  2.0  1.3  2.5  0.0  0.0
## 139  60.000  98.143   2.000 28.1  0.0  0.0  4.0  1.2  0.0  0.0
## 140  67.500 143.750   5.450  2.1  2.6  0.0  0.0 15.0 15.7  0.0
## 141 132.500 197.143   6.400  1.4 15.7  1.4  0.0  3.5  0.0  1.6
## 142  16.800  35.200   1.000 19.0  0.0 22.0  5.0  1.1  5.4  0.0
## 143  10.583  23.485   2.000 42.5  0.0  2.2  1.0  0.0  0.0  0.0
## 144  70.000 200.231  19.400  2.5  1.4  1.4  6.2  4.1  1.8  3.9
## 145  77.333 147.833   3.000  4.4 11.2  6.8  0.0  1.0  0.0 31.6
## 146  47.500 276.000   8.100  6.5  4.1  0.0  7.7  9.9 18.2  7.0
## 147  46.667 123.333  30.400 39.7 12.7  0.0  1.1  2.7  0.0  1.6
## 148  33.800  75.207  23.800 32.8 28.0  2.0  3.5  1.0  0.0  1.5
## 149  48.000 116.200   7.300 12.2 16.0  1.0  1.4  1.9  1.2  0.0
## 150 109.000 188.667  32.000  1.9 25.4 21.7  0.0  0.0  1.0  0.0
## 151  39.000  72.696  22.700  0.0  5.6  1.2  0.0  8.0  2.7  0.0
## 152  22.200 116.200  16.000  0.0  0.0  0.0  1.2  5.7 32.1  0.0
## 153  44.000  34.000  53.100  2.2  0.0  0.0  1.2  5.9 77.6  0.0
## 154  27.500  76.333   2.100  3.4 21.5 14.0  1.8  3.9  0.0  0.0
## 155  24.000  58.374  27.500  2.8  1.9  0.0  1.2 19.0  4.5  0.0
## 156 175.667 361.000  28.567 24.8 10.4  0.0  6.9  0.0  0.0  2.7
## 157 234.500 236.000  22.500 32.5 12.0  0.0  5.0  0.0  0.0  1.9
## 158  54.100 125.800  26.800  0.0 28.0  0.0  0.0  0.0  0.0 15.1
## 159  20.667  54.916  20.600  0.0 11.3  1.8  0.0  2.5  0.0  1.4
## 160  30.750  75.333  34.750  0.0 20.1  0.0  0.0  0.0  0.0  0.0
## 161 102.000 186.000  68.050  1.7 20.6  1.5  2.2  0.0  0.0  0.0
## 162 151.333 252.500  93.683 12.3 21.7  3.9  0.0  0.0  0.0  3.9
## 163 180.667 269.667  92.667  7.2 28.2  0.0  0.0  0.0  0.0  3.3
## 164   8.600  46.438  81.540  3.4 21.5  0.0  0.0  0.0  0.0  2.7
## 165  14.667  85.000   2.000  0.0  0.0  0.0  2.4  0.0 17.8  3.6
## 166  66.400 171.272   3.800  1.1  0.0  1.4  0.0  6.6 42.1  5.2
## 167 102.364 232.900  54.367  0.0  6.0  2.9  0.0  0.0  0.0  2.9
## 168  84.300 146.452  21.220  1.4 14.7  2.5  0.0  0.0  0.0  2.0
## 169 221.900 246.667  14.700 12.5  2.1  0.0  1.2  6.4  4.5  1.7
## 170 205.636 219.909   6.209  0.0  0.0  0.0  0.0  8.6 52.5  0.0
## 171 236.400 272.222  20.578  2.5 13.2  0.0  2.0  7.4 17.2  0.0
## 172 346.167 388.167   5.083  1.7 12.0  4.9  2.7  0.0  5.9  1.7
## 173  82.222 167.900   5.609  1.4  4.6 10.8  2.2  5.5 42.4  0.0
## 174  64.389 137.778   9.384  0.0  3.8 16.0  4.0  0.0  0.0  3.3
## 175  87.333 194.100  27.618  0.0  1.2  0.0  0.0 11.3 11.5  0.0
## 176 159.167 221.278  20.800  0.0 21.1  3.7  0.0  0.0  0.0  1.9
## 177  16.000  21.300   1.100 39.7  0.0 12.9  0.0  0.0  0.0  0.0
## 178  12.750  11.000   0.600 37.3  9.7 13.6  0.0  2.2  0.0  1.2
## 179   7.667  14.354   0.800 52.4  7.5  9.4  0.0  1.4  1.9  0.0
## 180   3.222   7.000   1.300 48.3  2.0  0.0  0.0  0.0  0.0  0.0
## 181   3.800   6.200   0.800 50.4  3.8  0.0  0.0  0.0  0.0  0.0
## 182   4.000   7.654   4.000 56.8  5.0  0.0  0.0  0.0  0.0  0.0
## 183   6.000  16.000   2.860 17.3  6.7 19.7  0.0  0.0  0.0  0.0
## 184  21.083  56.091      NA 16.8 19.6  4.0  0.0  0.0  0.0  0.0
## 185  26.625  52.875   2.000 18.1  1.7  2.0  0.0  1.7  5.9  0.0
## 186 104.727 228.364  46.075  1.1  3.9  2.1  0.0  3.9  4.6  2.3
## 187  41.300  85.400  17.491  0.0  4.7  0.0  0.0  2.6  2.6  0.0
## 188  51.125  87.125  14.775  0.0 12.0  1.7  0.0  2.7  0.0  0.0
## 189  45.900 101.455  18.330  1.7  7.0  1.2  0.0  4.8  3.1  0.0
## 190  50.889 127.000  24.556  0.0  0.0 10.2  1.7  1.2  0.0  5.5
## 191  34.500  81.558   5.620  7.6  0.0  1.2  0.0 15.9 31.8  5.9
## 192  19.727  50.455   8.155  2.9  4.6  1.0  0.0  6.6 16.6  0.0
## 193  19.111 120.889   5.111  2.2 12.7  8.8  0.0  0.0  0.0  1.2
## 194  51.111  91.111  22.900  3.8 22.0  2.9  0.0  3.1  5.5  0.0
## 195  19.111  61.444   6.167 18.9 13.2  5.0  0.0  6.1  0.0  0.0
## 196  53.625  79.750   2.338 12.7 21.7  5.6  0.0  1.0  0.0  0.0
## 197  35.333  75.904   4.667 18.0  7.0  1.7  0.0  4.8 10.3  1.0
## 198  78.333 140.220  31.738  0.0 15.9  2.4  1.0  0.0  0.0  0.0
## 200  64.000 140.517  18.300  2.4 10.5  9.0  7.8  0.0  0.0  5.8

Tamaño

dim(ds)
## [1] 198  18

Columnas

names(ds)
##  [1] "temporada" "tamano"    "velocidad" "mxPH"      "mnO2"     
##  [6] "Cl"        "NO3"       "NO4"       "oPO4"      "PO4"      
## [11] "Chla"      "a1"        "a2"        "a3"        "a4"       
## [16] "a5"        "a6"        "a7"

Estructura

str(ds)
## 'data.frame':    198 obs. of  18 variables:
##  $ temporada: Factor w/ 4 levels "autumn","spring",..: 4 2 1 2 1 4 3 1 4 4 ...
##  $ tamano   : Factor w/ 3 levels "large","medium",..: 3 3 3 3 3 3 3 3 3 3 ...
##  $ velocidad: Factor w/ 3 levels "high","low","medium": 3 3 3 3 3 1 1 1 3 1 ...
##  $ mxPH     : num  8 8.35 8.1 8.07 8.06 8.25 8.15 8.05 8.7 7.93 ...
##  $ mnO2     : num  9.8 8 11.4 4.8 9 13.1 10.3 10.6 3.4 9.9 ...
##  $ Cl       : num  60.8 57.8 40 77.4 55.4 ...
##  $ NO3      : num  6.24 1.29 5.33 2.3 10.42 ...
##  $ NO4      : num  578 370 346.7 98.2 233.7 ...
##  $ oPO4     : num  105 428.8 125.7 61.2 58.2 ...
##  $ PO4      : num  170 558.8 187.1 138.7 97.6 ...
##  $ Chla     : num  50 1.3 15.6 1.4 10.5 ...
##  $ a1       : num  0 1.4 3.3 3.1 9.2 15.1 2.4 18.2 25.4 17 ...
##  $ a2       : num  0 7.6 53.6 41 2.9 14.6 1.2 1.6 5.4 0 ...
##  $ a3       : num  0 4.8 1.9 18.9 7.5 1.4 3.2 0 2.5 0 ...
##  $ a4       : num  0 1.9 0 0 0 0 3.9 0 0 2.9 ...
##  $ a5       : num  34.2 6.7 0 1.4 7.5 22.5 5.8 5.5 0 0 ...
##  $ a6       : num  8.3 0 0 0 4.1 12.6 6.8 8.7 0 0 ...
##  $ a7       : num  0 2.1 9.7 1.4 1 2.9 0 0 0 1.7 ...

NOTA: Indicar si hay una discrepancia entre las clases de las variables en el data set y en su significado, i.e. fechas que no son fechas, si no factores, etc.

No hay discrepancias porque las primeras tres variables tienen los factores correctos y las últimas variables son todas numéricas.

Observaciones

head(ds)
##   temporada tamano velocidad mxPH mnO2     Cl    NO3     NO4    oPO4
## 1    winter  small    medium 8.00  9.8 60.800  6.238 578.000 105.000
## 2    spring  small    medium 8.35  8.0 57.750  1.288 370.000 428.750
## 3    autumn  small    medium 8.10 11.4 40.020  5.330 346.667 125.667
## 4    spring  small    medium 8.07  4.8 77.364  2.302  98.182  61.182
## 5    autumn  small    medium 8.06  9.0 55.350 10.416 233.700  58.222
## 6    winter  small      high 8.25 13.1 65.750  9.248 430.000  18.250
##       PO4 Chla   a1   a2   a3  a4   a5   a6  a7
## 1 170.000 50.0  0.0  0.0  0.0 0.0 34.2  8.3 0.0
## 2 558.750  1.3  1.4  7.6  4.8 1.9  6.7  0.0 2.1
## 3 187.057 15.6  3.3 53.6  1.9 0.0  0.0  0.0 9.7
## 4 138.700  1.4  3.1 41.0 18.9 0.0  1.4  0.0 1.4
## 5  97.580 10.5  9.2  2.9  7.5 0.0  7.5  4.1 1.0
## 6  56.667 28.4 15.1 14.6  1.4 0.0 22.5 12.6 2.9
tail(ds)
##     temporada tamano velocidad mxPH mnO2      Cl   NO3     NO4   oPO4
## 194    autumn  large    medium 8.58 11.1  23.825 3.617  72.600 51.111
## 195    summer  large    medium 8.50  7.9  12.444 2.586  96.667 19.111
## 196    autumn  large    medium 8.40  8.4  17.375 3.833  83.750 53.625
## 197    spring  large    medium 8.30 10.6  14.320 3.200 125.333 35.333
## 198    autumn  large    medium 8.20  7.0 139.989 2.978  60.110 78.333
## 200    summer  large    medium 8.50  6.7  82.852 2.800  27.069 64.000
##         PO4   Chla   a1   a2  a3  a4  a5   a6  a7
## 194  91.111 22.900  3.8 22.0 2.9 0.0 3.1  5.5 0.0
## 195  61.444  6.167 18.9 13.2 5.0 0.0 6.1  0.0 0.0
## 196  79.750  2.338 12.7 21.7 5.6 0.0 1.0  0.0 0.0
## 197  75.904  4.667 18.0  7.0 1.7 0.0 4.8 10.3 1.0
## 198 140.220 31.738  0.0 15.9 2.4 1.0 0.0  0.0 0.0
## 200 140.517 18.300  2.4 10.5 9.0 7.8 0.0  0.0 5.8
ds[sample(ds,6),]

Sumario Estadístico

summary(ds)
##   temporada     tamano    velocidad       mxPH           mnO2       
##  autumn:40   large :44   high  :84   Min.   :5.60   Min.   : 1.500  
##  spring:53   medium:84   low   :33   1st Qu.:7.70   1st Qu.: 7.800  
##  summer:44   small :70   medium:81   Median :8.06   Median : 9.800  
##  winter:61                           Mean   :8.02   Mean   : 9.125  
##                                      3rd Qu.:8.40   3rd Qu.:10.800  
##                                      Max.   :9.70   Max.   :13.400  
##                                      NA's   :1      NA's   :1       
##        Cl               NO3              NO4                oPO4       
##  Min.   :  0.222   Min.   : 0.050   Min.   :    5.00   Min.   :  1.00  
##  1st Qu.: 10.981   1st Qu.: 1.296   1st Qu.:   38.33   1st Qu.: 15.70  
##  Median : 32.730   Median : 2.675   Median :  103.17   Median : 40.15  
##  Mean   : 43.636   Mean   : 3.282   Mean   :  501.30   Mean   : 73.59  
##  3rd Qu.: 57.824   3rd Qu.: 4.446   3rd Qu.:  226.95   3rd Qu.: 99.33  
##  Max.   :391.500   Max.   :45.650   Max.   :24064.00   Max.   :564.60  
##  NA's   :8                                                             
##       PO4             Chla               a1               a2        
##  Min.   :  1.0   Min.   :  0.200   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 43.5   1st Qu.:  2.000   1st Qu.: 1.525   1st Qu.: 0.000  
##  Median :104.0   Median :  5.475   Median : 6.950   Median : 3.000  
##  Mean   :138.5   Mean   : 13.971   Mean   :16.996   Mean   : 7.471  
##  3rd Qu.:214.0   3rd Qu.: 18.308   3rd Qu.:24.800   3rd Qu.:11.275  
##  Max.   :771.6   Max.   :110.456   Max.   :89.800   Max.   :72.600  
##  NA's   :1       NA's   :10                                         
##        a3               a4               a5               a6        
##  Min.   : 0.000   Min.   : 0.000   Min.   : 0.000   Min.   : 0.000  
##  1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.000   1st Qu.: 0.000  
##  Median : 1.550   Median : 0.000   Median : 2.000   Median : 0.000  
##  Mean   : 4.334   Mean   : 1.997   Mean   : 5.116   Mean   : 6.005  
##  3rd Qu.: 4.975   3rd Qu.: 2.400   3rd Qu.: 7.500   3rd Qu.: 6.975  
##  Max.   :42.800   Max.   :44.600   Max.   :44.400   Max.   :77.600  
##                                                                     
##        a7        
##  Min.   : 0.000  
##  1st Qu.: 0.000  
##  Median : 1.000  
##  Mean   : 2.487  
##  3rd Qu.: 2.400  
##  Max.   :31.600  
## 

Limpieza de metadatos

library(stringr)
# Usaremos la función que hiciste de ejercicio
names(ds) <- normalizarNombres(names(ds))

Además de normalizar los nombres de variables, este es el lugar para poner nombres que tengan significado como que la columna que tenga datos de fecha, se llame fecha o date.

names(ds)
##  [1] "temporada" "tamano"    "velocidad" "mx.pH"     "mn.o2"    
##  [6] "Cl"        "NO3"       "NO4"       "o.pO4"     "PO4"      
## [11] "Chla"      "a1"        "a2"        "a3"        "a4"       
## [16] "a5"        "a6"        "a7"

Ajuste de formatos

Las clases de las variables son

sapply(ds, class)
## temporada    tamano velocidad     mx.pH     mn.o2        Cl       NO3 
##  "factor"  "factor"  "factor" "numeric" "numeric" "numeric" "numeric" 
##       NO4     o.pO4       PO4      Chla        a1        a2        a3 
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
##        a4        a5        a6        a7 
## "numeric" "numeric" "numeric" "numeric"

En esta sección arreglamos los formatos de los datos. Un ejemplo típico son las fechas.

Otros problemas con variables son: categóricas/numéricas que no lo son, booleanas que no lo son, ordenar variables nominales, reetiquetar las variables categóricas, etc.

Para arreglar las fechas, utiliza el paquete lubridate.

El formato de fechas debe de ser YMD y si es timestamp debe de serlo hasta la precisión que den los datos, no más, no menos.

# Ejemplo hipotético

ds$fecha <- ymd(as.character(ds$fecha))

NOTA: Es recomendable hacer todas las transformaciones en un solo mutate y no una por una (a menos que haya problemas de memoria, y hay que usar otras técnicas).

Así quedan las variables corregidas:

sapply(ds, class)  
## temporada    tamano velocidad     mx.pH     mn.o2        Cl       NO3 
##  "factor"  "factor"  "factor" "numeric" "numeric" "numeric" "numeric" 
##       NO4     o.pO4       PO4      Chla        a1        a2        a3 
## "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" "numeric" 
##        a4        a5        a6        a7 
## "numeric" "numeric" "numeric" "numeric"

Transformación de variables

En esta sección incluímos la transformación de las variables necesarias (normalización, estandarización, binning, log, etc.)

NOTA: Es recomendable hacer todas las transformaciones en un solo mutate y no una por una (a menos que haya problemas de memoria, y hay que usar otras técnicas). # Identificación de variables

vars <- names(ds) # Guardamos los nombres de variables

target <- ""  # Si el modelo es supervisado
risk <- "" # Si se proveé, es la importancia de la observación respecto a la variable (es una variable de salida)
costo <- "" # Costo de equivocarse en la predicción (Si se proveé) (es una variable de salida)
id <- "" # Armar una id con columnas, o seleccionar el id del dataset

Recodificación

Antes de pasar a la etapa de ignorar variables, es importante recodificar.

Variables a ignorar

Identificamos en una variable, las columnas a ignorar en el entrenamiento del modelo.

IDs y variables de salida

vars.a.ignorar <- union(id, if (exists("risk")) risk, if (exists("costo")) costo)

Constantes y valores únicos por observación

# Ignoramos las que tengan un único valor por cada observación, pueden ser IDs
# IMPORTANTE: Esto puede eliminar fechas, ver sección anterior

ids <- names(which(sapply(ds, function(x) length(unique(x)) == nrow(ds)))

# Ignoramos los factores que tengan muchos niveles
# IMPORTANTE: ver sección anterior

factors <- which(sapply(ds[vars], is.factor))
niveles <- sapply(factors, function(x) length(levels(ds[[x]])))
(muchos.niveles <- names(which(niveles > 20)))

vars.a.ignorar <- union(vars.a.ignorar, muchos.niveles)

# Constantes
constantes <- names(which(sapply(ds[vars], function(x) all(x == x[1L]))))

var.a.ignorar <- union(vars.a.ignorar, ids, constantes

Faltantes

# Las que sean puros NAs
ids.nas.count <- sapply(ds[vars], function(x) sum(is.na(x)))
ids.nas <- names(which(ids.nas.count == nrow(ds)))

#vars.a.ignorar <- union(ids.nas, vars.a.ignorar)

# Las que tengan muchos NAs (un 70% o más)
ids.many.nas <- names(which(ids.nas.count >= 0.7*nrow(ds)))

#vars.a.ignorar <- union(ids.many.nas, vars.a.ignorar)

Variable de salida (target)

Si el problema de minado, es supervisado, removemos las observaciones que tengan NA en la variable target

dim(ds)
ds <- ds[!is.na(ds[target]),]
dim(ds)

Si el problema es de clasificación, hay que convertir la variable target a categórica.

ds[target] <- as.factor(ds[[target]])
table(ds[target])

Mostramos la distribución (esto nos indicará si el problema no está balanceado)

ggplot(data=ds, aes_string(x=target)) + geom_bar(width=0.3)

Variables correlacionadas

vars.cor <- cor(ds[which(sapply(ds, is.numeric))], use="complete.obs")
vars.cor[upper.tri(vars.cor, diag=TRUE)] <- NA

vars.cor <- vars.cor                                  %>%
            abs()                                     %>%   
            data.frame()                              %>%
            mutate(var1=row.names(vars.cor))          %>%
            gather(var2, cor, -var1)                  %>%
            na.omit()
            

vars.cor <- vars.cor[order(-abs(vars.cor$cor)), ]

(muy.cor <- filter(vars.cor, cor > 0.95)) # Mostramos las que tengan más del 95% de correlación
## [1] var1 var2 cor 
## <0 rows> (or 0-length row.names)
# Habría que decidir si se remueven y cuales se remueven (var1 o var2)
#vars.a.ignorar <- union(vars.a.ignorar, muy.cor$var2)
vars.a.ignorar <- muy.cor$var2

NOTA: ¿Qué pasa con las categóricas? ¿Usamos asociación o independencia?

Valores faltantes

En esta sección hay que poner la estrategia de manejo de valores faltantes elegida durante la etapa del EDA.

ListaCategoricas <- c("temporada","tamano","velocidad")
Mode <- function(x) {
  ux <- unique(x)
  ux[which.max(tabulate(match(x, ux)))]
}

imputarValorCentral <- function(data, colnames) {
  for(i in 1:length(colnames)){
    if(class(data[,colnames[i]])=='numeric'){
      ind = is.na(data[,colnames[i]])
      data[ind,colnames[i]] = rnorm(sum(ind), mean(data[,colnames[i]],na.rm = TRUE),sd(data[,colnames[i]], na.rm = TRUE))
    }else{
      ind = is.na(data[,colnames[i]])
      data[ind,colnames[i]] = Mode(data[,colnames[i]])
    }
  }
}

ggplot(data=ds) + 
  aes(x=o.pO4, y=PO4) + 
  geom_point(shape=1) + # Usamos una bolita para los puntos
  geom_smooth(method=lm, se=FALSE)
## Warning: Removed 1 rows containing missing values (stat_smooth).
## Warning: Removed 1 rows containing missing values (geom_point).

na.ind <- is.na(ds$PO4)
modelo <- lm(PO4 ~ o.pO4, data=ds)
summary(modelo)
## 
## Call:
## lm(formula = PO4 ~ o.pO4, data = ds)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -110.12  -36.34  -12.68   23.26  216.98 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)    
## (Intercept)   42.897      4.808   8.922 3.34e-16 ***
## o.pO4          1.293      0.041  31.535  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 52.37 on 195 degrees of freedom
##   (1 observation deleted due to missingness)
## Multiple R-squared:  0.8361, Adjusted R-squared:  0.8352 
## F-statistic: 994.5 on 1 and 195 DF,  p-value: < 2.2e-16
pred <- predict.lm(modelo, ds[na.ind,])
ds[na.ind,'PO4'] <- pred

Hay muy pocas ocasiones donde es recomendable dejar que el modelo se encargue de las imputaciones.

Las observaciones a omitir, guárdalas en observaciones.omitidas.

Normalizar niveles

Removemos espacios, puntuaciones, camelCase, etc. en los niveles de los factores supervivientes.

factors <- which(sapply(ds[vars], is.factor))
for (f in factors) levels(ds[[f]]) <- normalizarNombres(levels(ds[[f]]))

Identificación de Variables

(vars.input <- setdiff(vars, target))
##  [1] "temporada" "tamano"    "velocidad" "mx.pH"     "mn.o2"    
##  [6] "Cl"        "NO3"       "NO4"       "o.pO4"     "PO4"      
## [11] "Chla"      "a1"        "a2"        "a3"        "a4"       
## [16] "a5"        "a6"        "a7"
idxs.input <- sapply(vars.input, function(x) which(x == names(ds)), USE.NAMES=FALSE)

idxs.numericas <- intersect(idxs.input, which(sapply(ds, is.numeric)))
(vars.numericas <- names(ds)[idxs.numericas])
##  [1] "mx.pH" "mn.o2" "Cl"    "NO3"   "NO4"   "o.pO4" "PO4"   "Chla" 
##  [9] "a1"    "a2"    "a3"    "a4"    "a5"    "a6"    "a7"
idxs.categoricas <- intersect(idxs.input, which(sapply(ds, is.factor)))
(vars.categoricas <- names(ds)[idxs.categoricas])
## [1] "temporada" "tamano"    "velocidad"
# Por conveniencia guardamos el número de observaciones supervivientes
num.observaciones <- nrow(ds)

Apéndice: Ambiente

## R version 3.1.1 (2014-07-10)
## Platform: x86_64-apple-darwin10.8.0 (64-bit)
## 
## locale:
## [1] es_ES.UTF-8/es_ES.UTF-8/es_ES.UTF-8/C/es_ES.UTF-8/es_ES.UTF-8
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] rattle_3.3.0       RColorBrewer_1.0-5 arm_1.7-07        
##  [4] lme4_1.1-7         Rcpp_0.11.3        Matrix_1.1-4      
##  [7] MASS_7.3-35        scales_0.2.4       reshape2_1.4      
## [10] plyr_1.8.1         stringr_0.6.2      lubridate_1.3.3   
## [13] ggplot2_1.0.0      tidyr_0.1          dplyr_0.3.0.2     
## 
## loaded via a namespace (and not attached):
##  [1] abind_1.4-0      assertthat_0.1   coda_0.16-1      colorspace_1.2-4
##  [5] DBI_0.3.1        digest_0.6.4     evaluate_0.5.5   formatR_1.0     
##  [9] grid_3.1.1       gtable_0.1.2     htmltools_0.2.6  knitr_1.7       
## [13] labeling_0.3     lattice_0.20-29  lazyeval_0.1.9   magrittr_1.0.1  
## [17] memoise_0.2.1    minqa_1.2.4      munsell_0.4.2    nlme_3.1-118    
## [21] nloptr_1.0.4     parallel_3.1.1   proto_0.3-10     rmarkdown_0.3.10
## [25] splines_3.1.1    tools_3.1.1      yaml_2.1.13